*BMT Pasireotide project
*2019/01/09
*Yi Ren;

DM 'CLEAR LOG';
DM 'CLEAR OUTPUT';
libname data 'H:\BMT\Pasireotide\data';

*import case;
proc import datafile='H:\BMT\Pasireotide\data\hari\0103\xase0103.csv' out=case dbms=csv replace;
	getnames=yes;
	guessingrows=max;
run;
data case2;
	set case;
	if Received_Pasireotide_="No" then delete;
	if Pt_ID=. then delete;
		pasiro=1;
run;
data case3;
	set case2 ;
	keep Pt_ID2 MRN Transplant_Date Engraftment Age_at_Tp Gender Race Hispanic Tp_Dx Status_at_Transplant Donor Transplant_Type Cell_Type Cond_Reg Donor_Type Graft_Src
		TP__ GVHD_ppx1-GVHD_ppx7 TBI	pasiro;
run;
*import control;
proc import datafile='H:\BMT\Pasireotide\data\hari\control0826.csv' out=control dbms=csv replace;
	getnames=yes;
	guessingrows=max;
run;
proc sort data=control;by MRN descending Transplant_Date;run;
proc sql;
	create table controlnumber as 
	select MRN, count(MRN) as n_TP from control
	group by MRN;
quit;
data control2;
	set control;
	by MRN;
	if first.MRN=1;
run;
data control;
	merge control2 controlnumber;
	by MRN;
	pasiro=0;
	*allo only;
	if Age_at_Tp>=60 and (Fludarabine="Yes") and (TBI="Yes" or Busulfan="Yes" ) then delete;


	if Graft_Src in ("Allo Matched Related" "Allo Matched Unrelated" "Cord Blood");
	keep  Pt_ID MRN  Transplant_Date Engraftment Age_at_Tp Gender Race Hispanic Tp_Dx Status_at_Transplant Donor Transplant_Type Cell_Type Cond_Reg Donor_Type Graft_Src
		TP__ GVHD_Prophy_1-GVHD_Prophy_6 TBI 	pasiro;
run;
data control;
	set control;
	rename GVHD_Prophy_1=GVHD_ppx1;
	rename GVHD_Prophy_2=GVHD_ppx2;
	rename GVHD_Prophy_3=GVHD_ppx3;
	rename GVHD_Prophy_4=GVHD_ppx4;
	rename GVHD_Prophy_5=GVHD_ppx5;
rename GVHD_Prophy_6=GVHD_ppx6;
run;
*Merge case and control;
data cohort;
	set case3 control;
	d2engraft=Engraftment-Transplant_Date;
run;
*HCT info added;
proc import datafile='H:\BMT\Pasireotide\data\hari\0203\hct.csv' out=hct dbms=csv replace;
	getnames=yes;
	guessingrows=max;
run;
proc sort data=cohort;by MRN;run;
proc sort data=hct;by MRN;run;
data hct;
	set hct;
	by MRN;
	if first.mrn;
run;
data cohort2;
	merge cohort(in=a) hct;
	by MRN;
	if a=1;
run;

*Data labeling;
data cohortd2;
	set cohort2;
	length group disease conditioning race2 hispanic2 gvhdp $50.;
	format dolc MMDDYY10.;
	if pasiro=1 then group="Treatment";
	else if pasiro=0 then group="Control";
	if tp_dx in ("ALL","AML",'Leukemia, unspecified','PLL','T-cell ALL','T-cell PLL') then disease="Acute Leukemias";
		else if tp_dx in ("Anaplastic large cell lymphoma (T/Null cell primary systemic type)","Angioimmunoblastic T-cell lymphoma","Angioimmunoblastic T-cell lym","B-cell lymphoma, unclass., wi",
				"Aplastic anemia","Burkitt's lymphoma / Burkitt cell leukemia",'Diffuse large B-cell lymphoma','Follicular lymphoma',"Gray zone lymphoma","Hepatosplenic T-cell lymphoma","Hodgkins lymphoma",
				"Mantle cell lymphoma (MCL)","Mycosis fungoides (Sezary syndrome)","Other B-cell lymphoma, Not Specified","Peripheral T-cell lymphoma",
"Precursor T-lymphoblastic lymphoma / leukemia","T-cell NHL","Anaplastic large cell lymphom","Enteropathy type T-cell lymph","High-grade B-cell lymph., wit",
"Large cell lymphoma (LCL)","Lymphoma / sarcoma","Mycosis fungoides (Sezary syn","Nodal margina zone B-cell lym","Other B-cell lymphoma, Not Sp","Primary Diffuse Large B-cell","T-cell / histiocytic rich lar") then disease="Lymphomas";
		else if tp_dx in ("CLL","CML",'CMML','Chronic myelomonocytic leukemia (CMMol)') then disease="MDS/MPNs/Other/Unknown";
		else if tp_dx in ("MDS","Multiple myeloma","Myelofibrosis","Primary myelofibrosis") then disease="MDS/MPNs/Other/Unknown";
		else if tp_dx in ("Essential thrombocythemia","Essential thrombocytopenia","Hypereosinophilia","Myeloproliferative disorder","Polycythemia vera") then disease="MDS/MPNs/Other/Unknown";
		else if tp_dx in ("CGD (Chronic Granulomatous Disease)","Common variable immunodeficiency",'PNH','Plasmacytoid dendritic cell neoplasm',"") then disease="MDS/MPNs/Other/Unknown";
		else disease="MDS/MPNs/Other/Unknown";
	if race="White" then race2="White";
		else if race="Black" then race2="Black";
		else if race="Asian" then race2="Asian";
		else race2="Other/Unknown";
	if hispanic="Ye" then hispanic2="Hispanic";
		else hispanic2="Non/Unknown";
	if TBI="Yes" then conditioning="TBI";
		else conditioning="Chemo only";
	if GVHD_ppx1 ne "FK506" and GVHD_ppx2 ne "FK506" then gvhdp="Other";
		else if GVHD_ppx1="MTX" or GVHD_ppx2="MTX" then gvhdp="Tac+MTX";
		else gvhdp="Other";
	if MRN in ("TK7802" "D1949759" "D1878227" "D1948415") then gvhdp="Other";
	else if pasiro=1 then gvhdp="Tac+MTX";
	if MRN="MGH" then do;
		gvhdp="Tac+MTX";
		conditioning="Chemo only";
		Age_at_Tp=66;
	end;

run;

*Update 2020 add survival info;
proc import datafile='H:\BMT\Pasireotide\data\hari\0217\UpdatedCaseControlList02142020_YiSurvival.xlsx' out=extra dbms=excel replace;
	sheet="CheckingWork";
	getnames=yes;
run;
data cohort;
	set extra;
if mrn ne "";
run;
data mrninfo;	
	set cohortd2;
	keep mrn disease conditioning gvhdp hispanic race2 Cell_Type d2engraft;
run;
proc sort data=cohort;by mrn;run;
proc sort data=mrninfo;by mrn;run;
data cohort4;
	merge cohort(in=a) mrninfo;
	by mrn;
	group=category;
	if gender="2" then gender="F";
	if a=1;
run;

*Create table 1;
%include "H:\R SAS help\macros\freqs - 20151020.sas";
%include "H:\R SAS help\macros\means - 20170227.sas";


%means(cohort4,,AGE,Age,group,anova,means_allpts);
%means(cohort4,,d2engraft,Days to engraftment,group,anova,means_allpts);

%freqs(cohort4,,Gender,Gender,group,chisq,Chi-Square,ChiSq,freqs_allpts);
%freqs(cohort4,,Race2,Race,group,fisher,XP2_FISH,FishersExact,freqs_allpts);
%freqs(cohort4,,Hispanic,Hispanic,group,fisher,XP2_FISH,FishersExact,freqs_allpts);

%freqs(cohort4,,disease,Disease,group,chisq,Chi-Square,ChiSq,freqs_allpts);
%freqs(cohort4,,conditioning,Conditioning,group,chisq,Chi-Square,ChiSq,freqs_allpts);

%freqs(cohort4,,gvhdp,GVHD Prophy,group,chisq,Chi-Square,ChiSq,freqs_allpts);


%freqs(cohort4,,GRAFT,GRAFT,group,fisher,XP2_FISH,FishersExact,freqs_allpts);
data patchars;
	length stat $50.;
	set  means_allpts freqs_allpts  ;
	if var in ("Age" ) and value="" then value="Median (IQR)";
	*if  pval=. then  pval=-1;
run;

ods escapechar = '^';
ods rtf file = 'H:\BMT\landmark\output\table1.rtf';
proc report data=patchars nowd;
	columns var value pop, (stat) pval dummy;
	define var / "" group order=data noprint;
	define value / "" group order=data;
	define pop / "" across;
	define stat / "N (%)" display;
	define pval / "P-Value" group;
	define dummy / noprint;
	compute before var;
		line @1 var $50.;
	endcomp;
	title "Patient Characteristics";
run;
ods rtf close;

*Survival test (KM and Cox PH models);
data survive;
	set cohort;
	keep MRN CATEGORY MONTHSTOLASTCONTACT_ASOF081219 DEAD1ALIVE0_ASOF081219 RFS_MONTHSTORELAPSEORDEATH_ASOF0 RELAPSEORDEATH1NO0_ASOF081219 relapse AGE;
	if CATEGORY="Pasireotide" then do;
		if MONTHSTOLASTCONTACT_ASOF081219=RFS_MONTHSTORELAPSEORDEATH_ASOF0 then relapse=DEAD1ALIVE0_ASOF081219;
		else if RFS_MONTHSTORELAPSEORDEATH_ASOF0<MONTHSTOLASTCONTACT_ASOF081219 then relapse=1;
		else relapse=999;
		end;
	else relapse=RELAPSEORDEATH1NO0_ASOF081219;
run;
data cohort4;
	set survive;
	dod=MONTHSTOLASTCONTACT_ASOF081219*30.5;
	dore=RFS_MONTHSTORELAPSEORDEATH_ASOF0*30.5;
	dead=DEAD1ALIVE0_ASOF081219;
	group=category;
run;
*Cause of death added;
proc import datafile='H:\BMT\Pasireotide\data\hari\0217\CauseOfDeathForYi_SummaryFrom07062020Sheet_HR07072020.xlsx' out=cause dbms=excel replace;
	sheet="Sheet1";
	getnames=yes;
run;
data cause;
	set cause;
	if Relapse=1 then cause=1;
	if GVHD=1 then cause=2;
	if Other_NRM=1 then cause=3;
	keep mrn cause;
run;
proc sort data=cause;by MRN;run;
proc sort data=cohort4;by mrn;run;
data cohort5;
	merge cohort4 cause;
	by mrn;
	if cause=. then cause=0;
run;
*Competing risk model;
proc lifetest;
 	time dod * cause(0)/failcode=3;
	strata group;
run;
proc phreg data=cohort5 plots(overlay=stratum)=cif;
	class group;
	model dod * cause(0) = group/eventcode=1;
run;
*CPH model;
ods output ModelANOVA=t3_survrecept ParameterEstimates=pe_survrecept ClassLevelInfo=refdata;
proc phreg data=cohort4 ;
	class group;
	model dod * dead (0) = group  AGE/risklimits type3 ;
run;
ods output ModelANOVA=t3_survrecept ParameterEstimates=pe_survrecept ClassLevelInfo=refdata;
proc phreg data=cohort4 ;
	class group;
	model  dore * relapse(0) = group  AGE/risklimits type3 ;
run;
data order;
	set t3_survrecept;
	order=_N_;
	keep effect order;
run;

 data refd (keep=classval0 effect);
	set refdata;
	n=_N_;
	if class="" then do;
		do until (class ne "");
		n=n-1;
		set refdata(keep=class) point=n;
		end;
	end;
	array y{*} $ x:;
	do i=1 to dim(y);
		if y{i}="1" then delete;
	end;
	rename class=effect;
	rename value=classval0;
run;
proc sort data=refd; by effect classval0; run;

data pe_survrecept_2(keep=effect classval0 hr_ci pval);
      set pe_survrecept;
      effect=parameter;
      hr_ci=strip(round(HazardRatio,0.001))||" ("||strip(round(HRLowerCL,0.001))||" - "||strip(round(HRUpperCL,0.001))||")";
      pval=probchisq;
      if classval0="" then classval0="Continuous";
run;
proc sort data=pe_survrecept_2; by effect; run;
proc sort data=t3_survrecept; by effect; run;
data surv_full_recept;
      merge pe_survrecept_2 t3_survrecept(keep=effect probchisq);
      by effect;
run;
data survfull2;
	set surv_full_recept refd;
	if hr_ci="" then do;
	hr_ci="-REF-";
	end;
run;
proc sort data=survfull2;by effect hr_ci;run; 
data survfull3;
	set survfull2;
	n=_N_;
	if probchisq="" then do;
		do until (probchisq ne "");
		n=n+1;
		set survfull2(keep=probchisq) point=n;
		end;
	end;
run;
proc sort data=survfull3;by effect classval0;run;

data survfull4;
	set survfull3;
	length overallp pvalchar$50.;
	by effect classval0;
	retain cnt;
	if first.effect then cnt=0;
	cnt+1;
	overallp=put(probchisq,pvalue5.3);
	pvalchar=put(pval,pvalue5.3);
	if overallp="<.001" then overallp="<0.001";
	if pvalchar="<.001" then pvalchar="<0.001";
	if hr_ci ne "-REF-" then overallp="" ;
	else pvalchar="";
run;
data survfull4;
	set survfull4;
	if hr_ci="-REF-" then cnt=0;
run;
proc sort data=survfull4;by effect ;run;
proc sort data=order;by effect ;run;
*customize here;
data survfull5;
	merge survfull4 order;
	by effect;
	length effect2 $50.;
	*Add labeling here;
	effect2=effect;
	if effect="b_surg" then effect2="Surgery Type"; 
	if effect="surgrad" then effect2="Surgery/Radiation"; 
	if effect="age" then effect2="Age"; 
	if effect="race_eth" then effect2="Race/Ethnicity"; 
	if effect="ins" then effect2="Insurance Status"; 
	if effect="chemo" then effect2="Chemotherapy"; 
run;
proc report data=survfull5 nowd missing;
      columns order effect2 cnt classval0 hr_ci pvalchar overallp ;
		define order/order order=internal noprint;
      define effect2 / "" group order=data noprint;
	  	  define cnt/order order=internal noprint;
      define classval0 / "" group order=data RIGHT;
      define hr_ci / "HR (95% CI)"  order=data center;
      define pvalchar / "P-Value" display RIGHT;
      define overallp/ "Overall P-Value" group RIGHT;
	  compute before effect2;
		line @1 effect2 $50.;
	endcomp;
      title "Adjusted Overall Survival ";
run;

